#######################################################################
#Name of code file: voting_analysis.R

#Data In: yishuv_20.xlsx, yishuv_21.xlsx, yishuv_22.xlsx, yishuv_23.xlsx,
#kalpi21.xlsx, kalpi22.xlsx, kalpi23.xlsx, bycode2018.xlsx, clean_census.xlsx

#Data Out: Figures 4, A3, A4, A5, A6, A7, A8, A9; Tables 1, A1, A2, A3, A5, A6,
#A7, A8, A15, A16
######################################################################

## Set your working directory to the replication folder
rm(list = ls())
setwd("C:/Users/User/Desktop/EUI/2nd term/Replicating Research/10/BSS replication files/Weiss et al original files/doc_replication")

# ----------------------------------------------------------------------
# load all relevant packages
# ----------------------------------------------------------------------

## If you don't have packages installed, uncomment and install
## install.packages("tidyverse")
## install.packages("ggplot2")
## install.packages("stargazer")
## install.packages("xtable")
## install.packages("texreg")
## install.packages("readstata13")
## install.packages("estimatr")
## install.packages("naniar")
## install.packages("lfe")
## install.packages("multiwayvcov")
## install.packages("lmtest")
## install.packages("xlsx")
## install.packages("readxl")
## install.packages("RItools")
## install.packages("dummies")
## install.packages("data.table")
## install.packages("MatchIt")

library("tidyverse")
library("ggplot2")
library("stargazer")
library("xtable")
library("texreg")
library("readstata13")
library("estimatr")
library("naniar")
library("lfe")
library("multiwayvcov")
library("lmtest")
#library("xlsx") # Doesn't work
library("readxl")
library("RItools")
#library("dummies") # Doesn't work
library("data.table")
library("MatchIt")

## On the computer used to generate the results of this script, below is what is
## printed to the console when sessionInfo() is run
## sessionInfo()

## > sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.1

## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib

## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base

## other attached packages:
##  [1] MatchIt_4.3.2      data.table_1.14.2  dummies_1.5.6      RItools_0.1-17
##  [5] SparseM_1.81       readxl_1.3.1       xlsx_0.6.5         lmtest_0.9-39
##  [9] zoo_1.8-9          multiwayvcov_1.2.3 lfe_2.8-7.1        Matrix_1.3-4
## [13] naniar_0.6.1       estimatr_0.30.4    readstata13_0.10.0 texreg_1.37.5
## [17] xtable_1.8-4       stargazer_5.2.2    forcats_0.5.1      stringr_1.4.0
## [21] dplyr_1.0.7        purrr_0.3.4        readr_2.1.1        tidyr_1.1.4
## [25] tibble_3.1.6       ggplot2_3.3.5      tidyverse_1.3.1

## loaded via a namespace (and not attached):
##  [1] svd_0.5          httr_1.4.2       jsonlite_1.7.2   splines_4.1.2
##  [5] modelr_0.1.8     Formula_1.2-4    assertthat_0.2.1 xlsxjars_0.6.1
##  [9] cellranger_1.1.0 pillar_1.6.4     backports_1.4.0  lattice_0.20-45
## [13] glue_1.5.1       visdat_0.5.3     rvest_1.0.2      colorspace_2.0-2
## [17] sandwich_3.0-1   pkgconfig_2.0.3  broom_0.7.10     haven_2.4.3
## [21] scales_1.1.1     tzdb_0.2.0       generics_0.1.1   ellipsis_0.3.2
## [25] withr_2.4.3      cli_3.1.0        survival_3.2-13  magrittr_2.0.1
## [29] crayon_1.4.2     fs_1.5.2         fansi_0.5.0      xml2_1.3.3
## [33] tools_4.1.2      hms_1.1.1        lifecycle_1.0.1  munsell_0.5.0
## [37] reprex_2.0.1     compiler_4.1.2   rlang_0.4.12     grid_4.1.2
## [41] rstudioapi_0.13  boot_1.3-28      gtable_0.3.0     abind_1.4-5
## [45] DBI_1.1.1        R6_2.5.1         lubridate_1.8.0  utf8_1.2.2
## [49] rJava_1.0-6      stringi_1.7.6    parallel_4.1.2   Rcpp_1.0.7
## [53] vctrs_0.3.8      dbplyr_2.1.1     tidyselect_1.1.1

# ----------------------------------------------------------------------
# load 2019a, 2019b, and 2020 election data and booth data
# ----------------------------------------------------------------------

## Uncomment and set working directory to replication folder
## setwd("folder")

## Elections data
elec_2015 <- read_excel("data/yishuv_20.xlsx")
elec_2019a <- read_excel("data/yishuv_21.xlsx")
elec_2019b <- read_excel("data/yishuv_22.xlsx")
elec_2020 <- read_excel("data/yishuv_23.xlsx")

## Booth data
kalpi_2019a <- read_excel("data/kalpi21.xlsx")
kalpi_2019b <- read_excel("data/kalpi22.xlsx")
kalpi_2020 <- read_excel("data/kalpi23.xlsx")

# ----------------------------------------------------------------------
# Select change some of the variables in these datasets
# ----------------------------------------------------------------------

## For 2015 election (don't use this in most analyses, note that it does not
## contain vote totals for parties, which were different in 2015 compared to
## 2019-2020, so those are added in as NA values for those columns)
## Additionally, "cycle" is added to specify which cycle in the analysis (there
## are three main ones). For 2015 we mark this as "Zero" because it is not used
## except in the appendix
election_0 <- elec_2015 %>%
    mutate(labor = NA,
           arab_party = NA,
           likud = NA,
           kahol_lavan = NA,
           cycle = "Zero")

### 2019a - note that we add Hadash/Ta'al and Balad/Arab List together to be the
### equivalent of the "Arab parties" in 2019b and 2020
election_1 <- elec_2019a %>%
    mutate(arab_party = balad + hadash,
           cycle = "First") %>%
    dplyr::select(-balad, -hadash) %>%
    relocate(arab_party, .after = labor)

### 2019b
election_2 <- elec_2019b %>%
    mutate(cycle = "Second")

## 2020
election_3 <- elec_2020 %>%
    mutate(cycle = "Third")

# ----------------------------------------------------------------------
# Append election results for 21-22-23 and add locality type index
# ----------------------------------------------------------------------

## Note that for these main analyses we do not include the 2015 results in the
## same dataset

# merge
elections <- rbind(election_1, election_2, election_3)

# add index
yishuv_index <- read_excel("data/bycode2018.xlsx")
census_2008 <- read_excel("data/clean_census.xlsx")

# Bind index with election data
elections <- left_join(elections, yishuv_index, by="lcode")
elections <- left_join(elections, census_2008, by="lcode")

# ----------------------------------------------------------------------
# Create indicators for the "Meshulash" and for localities in Trump's plan
# ----------------------------------------------------------------------

## Defining the triangle locations mentioned in the peace plan based on locality
## codes; also defining ext_tri which is all triangle locations, not just those
## mentioned in the peace plan
elections <- elections %>%
    mutate(.,
           triangle = case_when(
               lcode == 654 ~ 1,
               lcode == 637 ~ 1,
               lcode == 6000 ~ 1,
               lcode == 2710 ~ 1,
               lcode == 638 ~ 1,
               lcode == 2730 ~ 1,
               lcode == 634 ~ 1,
               lcode == 2720 ~ 1,
               lcode == 633 ~ 1,
               lcode == 633 ~ 1,
               lcode == 627 ~ 1
           ),
           triangle = ifelse(is.na(triangle),0, triangle),
           non_jewish = case_when(
               locality_relig == 1 ~ 0,
               locality_relig > 1 ~ 1
           ),
           ext_tri = case_when(
               non_jewish == 1 & sub_district %in% c(32,41,42) ~ 1
           ),
           ext_tri = ifelse(is.na(ext_tri), 0, ext_tri)
           )


# Filter out Jewish and distant vilages from the extended tri indicator
elections <- elections %>%
 mutate(.,
        ext_tri = ifelse(lcode %in% c(1320, 1327, 537, 541), 0, ext_tri))

# What are triangle localities that are not mentioned in the deal
test <- elections %>%
 filter(.,
        ext_tri == 1 & triangle == 0)
table(test$locality)

# ----------------------------------------------------------------------
# Create Voting indicators
# ----------------------------------------------------------------------

elections <- elections %>%
 mutate(.,
        turnout = valid_vote/elig_voters,
        arab_vs = arab_party/valid_vote,
        likud_vs = likud/valid_vote,
        kl_vs = kahol_lavan/valid_vote,
        labor_vs = labor/valid_vote,
        post = ifelse(cycle == "Third",1, 0))

# Note that in the second cycle, in Kramim and Telem, two Jewish localities there are
# more voters than eligible voters. Since we focus on non-jewish localities this
# is not a concern.

# ----------------------------------------------------------------------
# Subset to non-Jewish Localities
# ----------------------------------------------------------------------

## Filter to those mixed or Arab
nj_elec <- elections %>%
 filter(.,
        locality_relig > 1)

## Non-Jewish sample in data.table format
nj_elec_alt <- as.data.table(nj_elec)

## In the full sample, there is a single location (מעטפות חיצוניות) that was
## marked as having infinite turnout - it is only a single location, and is only
## used in some analyses in the appendix
## Here, we save an alternative form of the dataset with this observation
## removed
elections_alt <- as.data.table(elections)
elections_alt <- elections_alt[!(elections$turnout == Inf), ]

# ----------------------------------------------------------------------
# Repeat data preparation for dataset including 2015 results but no party outcomes
# ----------------------------------------------------------------------

## The same steps as above, but here in a new dataset that includes 2015 and
## (therefore) does not have party outcome data for all the elections
elections_all <- rbind(election_0, election_1, election_2, election_3)
elections_all <- left_join(elections_all, yishuv_index, by="lcode")
elections_all <- left_join(elections_all, census_2008, by="lcode")
elections_all <- elections_all %>%
 mutate(.,
        triangle = case_when(
         lcode == 654 ~ 1,
         lcode == 637 ~ 1,
         lcode == 6000 ~ 1,
         lcode == 2710 ~ 1,
         lcode == 638 ~ 1,
         lcode == 2730 ~ 1,
         lcode == 634 ~ 1,
         lcode == 2720 ~ 1,
         lcode == 633 ~ 1,
         lcode == 633 ~ 1,
         lcode == 627 ~ 1
        ),
        triangle = ifelse(is.na(triangle),0, triangle),
        non_jewish = case_when(
         locality_relig == 1 ~ 0,
         locality_relig > 1 ~ 1
        ),
        ext_tri = case_when(
         non_jewish == 1 & sub_district %in% c(32,41,42) ~ 1
        ),
        ext_tri = ifelse(is.na(ext_tri), 0, ext_tri)
       )
elections_all <- elections_all %>%
 mutate(.,
        ext_tri = ifelse(lcode %in% c(1320, 1327, 537, 541), 0, ext_tri))
elections_all <- elections_all %>%
 mutate(.,
        turnout = valid_vote/elig_voters,
        arab_vs = arab_party/valid_vote,
        likud_vs = likud/valid_vote,
        kl_vs = kahol_lavan/valid_vote,
        labor_vs = labor/valid_vote,
        post = ifelse(cycle == "Third",1, 0),
        post_placebo = as.numeric(plyr::mapvalues(cycle, c("Zero", "First", "Second", "Third"),
                                                  c(1, 0, 0, NA))))
nj_elec_all <- elections_all %>%
 filter(.,
        locality_relig > 1)

# ----------------------------------------------------------------------
# Clean and tidy up the booth data in a similar manner
# ----------------------------------------------------------------------

                                        # First round - add cycle indicator
kalpi_2019a <- kalpi_2019a %>%
    mutate(cycle = 1)

                                        # Second round - add cycle indicator
kalpi_2019b <- kalpi_2019b %>%
    mutate(cycle = 2)

                                        # Third round - add cycle indicator
kalpi_2020 <- kalpi_2020 %>%
    mutate(cycle = 3)

##### bind three elections
kalpi_elec <- rbind(kalpi_2019a, kalpi_2019b, kalpi_2020)%>%
    mutate(count_kalpi = 1)
nrow(kalpi_elec)

## Marking what is triangle and what is not and summarizing booth counts by
## locality
kalpi <- kalpi_elec %>%
    group_by(lcode, cycle)
kalpi_sum <- summarize(kalpi,
                       n = sum(count_kalpi)) %>%
    left_join(yishuv_index) %>%
    mutate(triangle = case_when(
               lcode == 654 ~ 1,
               lcode == 637 ~ 1,
               lcode == 6000 ~ 1,
               lcode == 2710 ~ 1,
               lcode == 638 ~ 1,
               lcode == 2730 ~ 1,
               lcode == 634 ~ 1,
               lcode == 2720 ~ 1,
               lcode == 633 ~ 1,
               lcode == 633 ~ 1,
               lcode == 627 ~ 1),
           triangle = ifelse(is.na(triangle), 0, 1),
           type = case_when(
               triangle == 1 ~ "Triangle",
               triangle == 0 & locality_relig >1 ~ "Non-Jewish"
           )) %>%
    dplyr::select(type, n, lcode, cycle)

# ----------------------------------------------------------------------
# Check for balance on covariates between triangle and non-triangle localities
# ----------------------------------------------------------------------

## Creating dataset
data_for_balance <- filter(nj_elec, cycle == "First") %>%
  rename(Population_2018 = pop_2018,
         Houshold_Density = HousingDens_avg_08,
         Academic_Education  = AcadmCert_pcnt_08,
         Vehicle_Per_Family  = Vehicle1up_pcnt_08,
         Employment = Worked_08,
         Age_0_19 = age0_19sef_pcnt_08,
         Age_65 = age65sef_pcnt_08,
         Age_85 = age85sef_pcnt_08,
         Turnout = turnout
         )

## Running the omnibus balance test from the RItools package
balance <- xBalance(triangle ~ Population_2018 + Houshold_Density + Academic_Education +
             Vehicle_Per_Family + Employment + Age_0_19 + Age_65 + Age_85 + Turnout,
         na.rm = T,
         data=data_for_balance,
         report=c("adj.mean.diffs", "z.scores", "p.values",
                  "chisquare.test"))

## Get the table to save
table <- xtable(balance,
                caption = 'Balance of Triangle and Non-Triangle Localities',
                label = 'tab:balance')
print(table, file = "tables/table_A3.tex",
      caption.placement = "top",
      label = tbl:balance,
      floating = T)

# ----------------------------------------------------------------------
# Parallel Trends -- Turnout
# ----------------------------------------------------------------------

## Function to calculate the mean and the range for each cycle
calc_range <-function(x){
    n <- length(x)
    mean <-mean(x, na.rm = T)
    high <- sd(x, na.rm = T)*1.96/sqrt(n) + mean
    low <- mean - (sd(x, na.rm = T)*1.96/sqrt(n))
    result <-data.frame(high, mean, low)
    names(result) <-c("high", "mean", "low")
    return(result)
}

## 2015 cycle, first for triangle locations (tri) and then for non-triangle
## locations (non_tri)
zero_tri <- nj_elec_all %>%
    filter(.,
           cycle == "Zero",
           triangle == 1)
tri_0 <- calc_range(zero_tri$turnout)
tri_0$Cycle <- 0
tri_0$Condition <- "Triangle"
zero_non_tri <- nj_elec_all %>%
    filter(.,
           cycle == "Zero",
           triangle == 0)
non_tri_0 <- calc_range(zero_non_tri$turnout)
non_tri_0$Cycle <- 0
non_tri_0$Condition <- "Non-Triangle"

### first cycle
first_tri <- nj_elec %>%
 filter(.,
        cycle == "First",
        triangle == 1
        )
tri_a <- calc_range(first_tri$turnout)
tri_a$Cycle <- 1
tri_a$Condition <- "Triangle"
first_non_tri <- nj_elec %>%
 filter(.,
        cycle == "First",
        triangle == 0
        )
non_tri_a <- calc_range(first_non_tri$turnout)
non_tri_a$Cycle <- 1
non_tri_a$Condition <- "Non-Triangle"

### Second cycle
scnd_tri <- nj_elec %>%
 filter(.,
        cycle == "Second",
        triangle == 1
 )
tri_b <- calc_range(scnd_tri$turnout)
tri_b$Cycle <- 2
tri_b$Condition <- "Triangle"
scnd_non_tri <- nj_elec %>%
 filter(.,
        cycle == "Second",
        triangle == 0
        )
non_tri_b <- calc_range(scnd_non_tri$turnout)
non_tri_b$Cycle <- 2
non_tri_b$Condition <- "Non-Triangle"

### Third cycle
thrd_tri <- nj_elec %>%
 filter(.,
        cycle == "Third",
        triangle == 1
 )
tri_c <- calc_range(thrd_tri$turnout)
tri_c$Cycle <- 3
tri_c$Condition <- "Triangle"
thrd_non_tri <- nj_elec %>%
 filter(.,
        cycle == "Third",
        triangle == 0
 )
non_tri_c <- calc_range(thrd_non_tri$turnout)
non_tri_c$Cycle <- 3
non_tri_c$Condition <- "Non-Triangle"

## Combine all of these together
par_trend <- rbind(tri_a, non_tri_a, tri_b, non_tri_b, tri_c, non_tri_c)
par_trend_all <- rbind(tri_0, non_tri_0, tri_a, non_tri_a, tri_b, non_tri_b, tri_c,
                       non_tri_c)

## Create the parallel trends figure in the paper
ggplot(par_trend, aes(x = Cycle, y = mean, color = Condition, shape = Condition)) +
    geom_line()+
    geom_point() +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0) +
    scale_x_continuous(breaks = 1:3,
                       limits = c(1,3),
                       labels = paste0(c("First", "Second",
                                         "Third"))) +
    geom_vline(xintercept = 2.2, linetype="dotted",
               color = "grey", size=0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE",
             family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST",
             family = "Times") +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    labs(x = "Election Cycle",
         y = "Turnout",
         color = "", shape = "") +
    guides(colour = guide_legend(reverse=T), shape = guide_legend(reverse = TRUE)) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure4.pdf", width = 6.5, height = 4)

## Create parallel trends with 2015 included for the appendix
ggplot(par_trend_all, aes(x = Cycle, y = mean, color = Condition, shape = Condition)) +
    geom_line()+
    geom_point() +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0) +
    scale_x_continuous(breaks = 0:3,
                       labels = paste0(c("2015", "2019a",
                                         "2019b", "2020"))) +
    geom_vline(xintercept = 2.2, linetype="dotted",
               color = "grey", size=0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE",
             family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST",
             family = "Times") +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    labs(x = "Election Cycle",
         y = "Turnout",
         color = "", shape = "") +
    guides(colour = guide_legend(reverse=T), shape = guide_legend(reverse = TRUE)) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A4.pdf", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# turnout Parallel Trends Full Sample
# ----------------------------------------------------------------------

### first cycle
first_tri <- filter(elections_alt, cycle == "First", triangle == 1)
tri_a <- calc_range(first_tri$turnout)
tri_a$Cycle <- 1
tri_a$Condition <- "Triangle"
first_non_tri <- filter(elections_alt, cycle == "First", triangle == 0)
non_tri_a <- calc_range(first_non_tri$turnout)
non_tri_a$Cycle <- 1
non_tri_a$Condition <- "Non-Triangle"

### Second cycle
scnd_tri <- filter(elections_alt, cycle == "Second", triangle == 1)
tri_b <- calc_range(scnd_tri$turnout)
tri_b$Cycle <- 2
tri_b$Condition <- "Triangle"
scnd_non_tri <- filter(elections_alt, cycle == "Second", triangle == 0)
non_tri_b <- calc_range(scnd_non_tri$turnout)
non_tri_b$Cycle <- 2
non_tri_b$Condition <- "Non-Triangle"

### Third cycle
thrd_tri <- filter(elections_alt, cycle == "Third", triangle == 1)
tri_c <- calc_range(thrd_tri$turnout)
tri_c$Cycle <- 3
tri_c$Condition <- "Triangle"
thrd_non_tri <- filter(elections_alt, cycle == "Third", triangle == 0)
non_tri_c <- calc_range(thrd_non_tri$turnout)
non_tri_c$Cycle <- 3
non_tri_c$Condition <- "Non-Triangle"

## Combine into a single dataset
par_trend <- rbind(tri_a, non_tri_a, tri_b, non_tri_b, tri_c, non_tri_c)
mylabs <- c("First", "Second", "Third")
par_trend$Cycle <- plyr::mapvalues(par_trend$Cycle, 1:3, mylabs)
par_trend$Cycle <- factor(par_trend$Cycle, mylabs)

## Now the new parallel trends graph
pd <- position_dodge(width = 0.1)
ggplot(par_trend, aes(x = Cycle, y = mean, color = Condition, shape = Condition,
                      group = Condition)) +
    geom_line(position = pd) +
    geom_point(position = pd) +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0, position = pd) +
    labs(x = "Election Cycle",
         y = "Turnout",
         color = "", shape = "") +
    geom_vline(xintercept = 2.2, linetype="dotted",
               color = "grey", size=0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE",
             family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST",
             family = "Times") +
    guides(colour = guide_legend(reverse=TRUE), shape = guide_legend(reverse = TRUE)) +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A5.png", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Parallel Trends for Blue-White Party
# ----------------------------------------------------------------------

## Labels for cycles
cycle_labs <- c("First", "Second", "Third")
treat_labs <- c("Non-Triangle", "Triangle")

## Get mean, upper, and lower bounds by cycle and treatment (basically the
## calc_range command but in data.table format)
para_vs_nj <- nj_elec_alt[, list(mean = mean(kl_vs, na.rm = TRUE),
                                 high = sd(kl_vs, na.rm = TRUE) * 1.96 / sqrt(.N) +
                                     mean(kl_vs, na.rm = TRUE),
                                 low = mean(kl_vs, na.rm = TRUE) -
                                     sd(kl_vs, na.rm = TRUE) * 1.96 / sqrt(.N)),
                          by = list(cycle, triangle)]

## Format variable
para_vs_nj[, triangle := plyr::mapvalues(triangle, 0:1, treat_labs)]

## Plot
pd <- position_dodge(width = 0.1)
ggplot(para_vs_nj, aes(x = cycle, y = mean, color = triangle, shape = triangle, group = triangle)) +
    geom_line(position = pd) +
    geom_point(position = pd) +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0, position = pd) +
    labs(x = "Election Cycle",
         y = "Blue-White Vote Share",
         color = "", shape = "") +
    geom_vline(xintercept = 2.2, linetype = "dotted", color = "grey", size = 0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE", family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST", family = "Times") +
    guides(colour = guide_legend(reverse = T), shape = guide_legend(reverse = TRUE)) +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A6.png", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Parallel Trends for Labor Party
# ----------------------------------------------------------------------

## Get mean, upper, and lower bounds by cycle and treatment
para_vs_labor_nj <- nj_elec_alt[, list(mean = mean(labor_vs, na.rm = TRUE),
                                 high = sd(labor_vs, na.rm = TRUE) * 1.96 / sqrt(.N) +
                                   mean(labor_vs, na.rm = TRUE),
                                 low = mean(labor_vs, na.rm = TRUE) -
                                   sd(labor_vs, na.rm = TRUE) * 1.96 / sqrt(.N)),
                          by = list(cycle, triangle)]

## Format variable
para_vs_labor_nj[, triangle := plyr::mapvalues(triangle, 0:1, treat_labs)]

## Plot
pd <- position_dodge(width = 0.1)
ggplot(para_vs_labor_nj, aes(x = cycle, y = mean, color = triangle, shape = triangle, group = triangle)) +
    geom_line(position = pd) +
    geom_point(position = pd) +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0, position = pd) +
    labs(x = "Election Cycle",
         y = "Labor Vote Share",
         color = "", shape = "") +
    geom_vline(xintercept = 2.2, linetype = "dotted", color = "grey", size = 0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE", family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST", family = "Times") +
    guides(colour = guide_legend(reverse = T), shape = guide_legend(reverse = TRUE)) +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A8.png", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Parallel Trends for Likud Party
# ----------------------------------------------------------------------

## Get mean, upper, and lower bounds by cycle and treatment
para_vs_likud_nj <- nj_elec_alt[, list(mean = mean(likud_vs, na.rm = TRUE),
                                 high = sd(likud_vs, na.rm = TRUE) * 1.96 / sqrt(.N) +
                                   mean(likud_vs, na.rm = TRUE),
                                 low = mean(likud_vs, na.rm = TRUE) -
                                   sd(likud_vs, na.rm = TRUE) * 1.96 / sqrt(.N)),
                          by = list(cycle, triangle)]

## Format variable
para_vs_likud_nj[, triangle := plyr::mapvalues(triangle, 0:1, treat_labs)]

## Plot
pd <- position_dodge(width = 0.1)
ggplot(para_vs_likud_nj, aes(x = cycle, y = mean, color = triangle, shape = triangle,group = triangle)) +
    geom_line(position = pd) +
    geom_point(position = pd) +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0, position = pd) +
    labs(x = "Election Cycle",
         y = "Likud Vote Share",
         color = "", shape = "") +
    geom_vline(xintercept = 2.2, linetype = "dotted", color = "grey", size = 0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE", family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST", family = "Times") +
    guides(colour = guide_legend(reverse = T), shape = guide_legend(reverse = TRUE)) +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A9.png", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Parallel Trends for Arab Party
# ----------------------------------------------------------------------

## Get mean, upper, and lower bounds by cycle and treatment
para_vs_arab_nj <- nj_elec_alt[, list(mean = mean(arab_vs, na.rm = TRUE),
                                       high = sd(arab_vs, na.rm = TRUE) * 1.96 / sqrt(.N) +
                                         mean(arab_vs, na.rm = TRUE),
                                       low = mean(arab_vs, na.rm = TRUE) -
                                         sd(arab_vs, na.rm = TRUE) * 1.96 / sqrt(.N)),
                                by = list(cycle, triangle)]

## Format variable
para_vs_arab_nj[, triangle := plyr::mapvalues(triangle, 0:1, treat_labs)]

## Plot
pd <- position_dodge(width = 0.1)
ggplot(para_vs_arab_nj, aes(x = cycle, y = mean, color = triangle, shape = triangle,group = triangle)) +
    geom_line(position = pd) +
    geom_point(position = pd) +
    geom_errorbar(aes(ymin = low, ymax = high), width = 0, position = pd) +
    labs(x = "Election Cycle",
         y = "Arab Vote Share",
         color = "", shape = "") +
    geom_vline(xintercept = 2.2, linetype = "dotted", color = "grey", size = 0.5) +
    annotate("text", x = 1.5, y = .42, label = "PRE", family = "Times") +
    annotate("text", x = 2.5, y = .42, label = "POST", family = "Times") +
    guides(colour = guide_legend(reverse = T), shape = guide_legend(reverse = TRUE)) +
    scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
    theme(text = element_text(size = 10, family = "Times"),
          legend.key=element_blank(),
          panel.grid.major = element_blank(),
          axis.text.x = element_text(size = 10),
          plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
          panel.grid.minor = element_blank(),
          panel.background = element_blank(),
          axis.line = element_line(colour = "black"))
ggsave("plots/figure_A7.png", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Descriptive Statistics
# ----------------------------------------------------------------------

## Selecting variables from the non-Jewish sample
disc_table_nj <- dplyr::select(nj_elec,
                     c(turnout, arab_vs, likud_vs, kl_vs,
                       triangle, ext_tri, pop_2018, age0_19sef_pcnt_08,
                       age65sef_pcnt_08, age85sef_pcnt_08,
                       AcadmCert_pcnt_08, Worked_08, HousingDens_avg_08,
                       Vehicle1up_pcnt_08, ChldBorn_avg_08))

## Outputting in table
stargazer(as.data.frame(disc_table_nj),
          covariate.labels = c("Turnout", "Arab Joint List VS", "Likud VS", "Blue-White VS",
                               "Triangle", "Extended Triangle",  "Population 2018",
                               "Perc. Age 0-19", "Perc. Age 65+", "Perc. Age 85+",
                               "Perc. Academic", "Perc. Employed", "Housing Density",
                               "HH with Vehicle", "Average Children per Women"),
          title = "Descriptive Statistics - Non Jewish Localities",
          label = "tab:dsc",
          omit.summary.stat = c("p25", "p75"),
          style = "qje",
          notes = c("All variables starting with 'Perc. Age 0-19' are from the 2008 census."),
          notes.append = T,
          notes.align = "l",
          out="tables/table_A1.tex")

## Selecting variables from the full sample
disc_table_all <- dplyr::select(elections,
                                c(turnout, arab_vs, likud_vs, kl_vs,
                                  triangle, pop_2018, age0_19sef_pcnt_08,
                                  age65sef_pcnt_08, age85sef_pcnt_08,
                                  AcadmCert_pcnt_08, Worked_08, HousingDens_avg_08,
                                  Vehicle1up_pcnt_08, ChldBorn_avg_08)) %>%
    filter(.,
           !is.infinite(turnout)) #here removing location with infinite turnout

## Outputting in table
stargazer(as.data.frame(disc_table_all),
          covariate.labels = c("Turnout", "Arab Joint List VS", "Likud VS", "Blue-White VS",
                               "Triangle",  "Population 2018",
                               "Perc. Age 0-19", "Perc. Age 65+", "Perc. Age 85+",
                               "Perc. Academic", "Perc. Employed", "Housing Density",
                               "HH with Vehicle", "Average Children per Women"),
          title = "Descriptive Statistics - All Localities",
          label = "tab:dsc_all",
          style = "qje",
          omit.summary.stat = c("p25", "p75"),
          notes = c("All variables starting with 'Perc. Age 0-19' are from the 2008 census."),
          notes.append = T,
          notes.align = "l",
          out="tables/table_A2.tex")

# ----------------------------------------------------------------------
# Turnout Models
# ----------------------------------------------------------------------

## Table 1 model 1: Main model, population control and clustered by locality
tur_pop <- felm(turnout ~ triangle * post + pop_2018 | 0 | 0 |lcode,
            data = nj_elec,
            exactDOF = TRUE,
            keepCX = TRUE)
summary(tur_pop)

## Table 1 model 2: Main model but with expanded definition of triangle and
## locality and cycle fixed effects
ext_tri <- nj_elec %>%
 mutate(.,
        triangle = ext_tri) %>%
 felm(turnout ~ triangle * post | cycle + lcode | 0 |lcode,
                data = .,
                exactDOF = TRUE,
                keepCX = TRUE)
summary(ext_tri)

## Table 1 model 3: Main model with locality and cycle fixed effects
tur_pop_lfe <- felm(turnout ~ triangle * post | cycle + lcode | 0 | lcode,
                    data = nj_elec,
                    exactDOF = TRUE,
                    keepCX = TRUE)
summary(tur_pop_lfe)

## Table 1 model 4: Full sample model with locality fixed effects
tur_national_lfe <- felm(turnout ~ triangle * post | cycle + lcode | 0 | lcode,
                    data = elections_alt,
                    exactDOF = TRUE,
                    keepCX = TRUE)
summary(tur_national_lfe)

## Some coefficients drop because of fixed effects, but the SE is still there
## and prints when using stargazer - this code provides an edited list of the
## SEs to use when producing a stargazer table
se_list <- lapply(list(tur_pop, tur_pop_lfe, ext_tri, tur_national_lfe),
                  function(x) coefficients(summary(x))[, "Cluster s.e."])
se_list[[2]][1:2] <- NA
se_list[[3]][1:2] <- NA
se_list[[4]][1:2] <- NA

## Save in a table
stargazer(tur_pop, tur_pop_lfe, ext_tri, tur_national_lfe,
          out = "tables/table1.tex",  style = "qje",
          title = "Deal of the Century Effect on Turnout",
          omit = c("Constant", "pop_2018"),
          dep.var.labels = c("Turnout"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci = FALSE,
          se = se_list,
          omit.table.layout = "n",
          column.sep.width = "-6pt",
          star.cutoffs = NULL,
          label = "tab:main_did",
          star.char = c("", "", ""),
          add.lines = list(
              c("Pop Control",  "Yes", "No", "No", "No"),
              c("Cycle FE",  "No", "Yes",  "Yes", "Yes"),
              c("Locality FE", "No", "Yes", "Yes", "Yes"),
              c("Cluster",  "Locality", "Locality",  "Locality", "Locality"),
              c("Sample",  "Non-Jewish", "Non-Jewish", "Non-Jewish", "Full"),
              c("Treatment", "10 Localities", "10 Localities", "16 Localities",
                "10 Localities"),
              c("Pre-Register", "Yes", "No", "No", "No")))

# ----------------------------------------------------------------------
# Party Vote Share Models
# ----------------------------------------------------------------------

## Joint List share
arab_vote <- felm(arab_vs ~ triangle * post + pop_2018 + cycle | 0 | 0 | lcode,
                  data = nj_elec,
                  exactDOF = TRUE,
                  keepCX = TRUE)
summary(arab_vote)

## Likud share
likud_vote <- felm(likud_vs ~ triangle * post + pop_2018 + cycle | 0 | 0 | lcode,
                   data = nj_elec,
                   exactDOF = TRUE,
                   keepCX = TRUE)
summary(likud_vote)

## Blue-White share
kl_vote <- felm(kl_vs ~ triangle * post + pop_2018 + cycle | 0 | 0 | lcode,
                data = nj_elec,
                exactDOF = TRUE,
                keepCX = TRUE)
summary(kl_vote)

## Print out the table
stargazer(arab_vote, likud_vote, kl_vote,
          out = "tables/table_A7.tex",  style="qje",
          title = "Deal of the Century Effect on Party Vote Share",
          omit = c("Constant", "cycleThird", "cycleSecond", "pop_2018"),
          dep.var.labels = c("Joint List", "Likud", "Blue-White"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci = FALSE,
          omit.table.layout = "n",
          column.sep.width = "-6pt",
          star.cutoffs = NULL,
          label = "tab:party_did",
          star.char = c("", "", ""),
          add.lines = list(
              c("Population Controls",  "Yes", "Yes", "Yes"),
              c("Cycle FE",  "Yes", "Yes", "Yes"),
              c("Cluster",  "Locality", "Locality", "Locality"),
              c("Sample",  "Non-Jewish", "Non-Jewish", "Non-Jewish"),
              c("Pre-Register",  "No", "No", "No")))

## Also want a table for the appendix that includes these models with locality
## fixed effects

## Joint List share
arab_vote_lfe <- felm(arab_vs ~ triangle * post | lcode + cycle | 0 | lcode,
                      data = nj_elec,
                      exactDOF = TRUE,
                      keepCX = TRUE)
summary(arab_vote_lfe)

## Likud share
likud_vote_lfe <- felm(likud_vs ~ triangle * post | lcode + cycle | 0 | lcode,
                       data = nj_elec,
                       exactDOF = TRUE,
                       keepCX = TRUE)
summary(likud_vote_lfe)

## Blue-White share
kl_vote_lfe <- felm(kl_vs ~ triangle * post | lcode + cycle | 0 | lcode,
                    data = nj_elec,
                    exactDOF = TRUE,
                    keepCX = TRUE)
summary(kl_vote_lfe)

## Print out the table
stargazer(arab_vote_lfe, likud_vote_lfe, kl_vote_lfe,
          out = "tables/table_A8.tex",  style="qje",
          title = "Deal of the Century Effect on Party Vote Share",
          omit = c("Constant"),
          dep.var.labels = c("Joint List", "Likud", "Blue-White"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci = FALSE,
          omit.table.layout = "n",
          column.sep.width = "-6pt",
          star.cutoffs = NULL,
          label = "tab:party_did_lfe",
          star.char = c("", "", ""),
          add.lines = list(
              c("Cycle FE",  "Yes", "Yes", "Yes"),
              c("Locality FE", "Yes", "Yes", "Yes"),
              c("Cluster",  "Locality", "Locality", "Locality"),
              c("Sample",  "Non-Jewish", "Non-Jewish", "Non-Jewish"),
              c("Pre-Register",  "No", "No", "No")))

# ----------------------------------------------------------------------
# Including 2008 census controls
# ----------------------------------------------------------------------

## Couple of controls
turn_demog_1 <- felm(turnout ~ triangle * post + cycle+
                       AcadmCert_pcnt_08 + Worked_08
                     | 0 | 0 |lcode,
                     data = nj_elec,
                     exactDOF = TRUE,
                     keepCX = TRUE)
summary(turn_demog_1)

## Household variables
turn_demog_2 <- felm(turnout ~ triangle * post + cycle +
                      AcadmCert_pcnt_08 + Worked_08 +
                      HousingDens_avg_08 + Vehicle1up_pcnt_08 + ChldBorn_avg_08
                     | 0 | 0 |lcode,
                     data = nj_elec,
                     exactDOF = TRUE,
                     keepCX = TRUE)
summary(turn_demog_2)

## Age variables
turn_demog_3 <- felm(turnout ~ triangle * post  + cycle +
                       AcadmCert_pcnt_08 + Worked_08 +
                       HousingDens_avg_08 + Vehicle1up_pcnt_08 + ChldBorn_avg_08+
                       age0_19sef_pcnt_08 + age65sef_pcnt_08 + age85sef_pcnt_08
                      | 0 | 0 |lcode,
                      data = nj_elec,
                      exactDOF = TRUE,
                      keepCX = TRUE)
summary(turn_demog_3)

## Same but in the full sample
turn_demog_4 <- felm(turnout ~ triangle * post + cycle+
                      AcadmCert_pcnt_08 + Worked_08
                     | 0 | 0 |lcode,
                     data = elections,
                     exactDOF = TRUE,
                     keepCX = TRUE)
summary(turn_demog_4)

turn_demog_5 <- felm(turnout ~ triangle * post + cycle +
                      AcadmCert_pcnt_08 + Worked_08 +
                      HousingDens_avg_08 + Vehicle1up_pcnt_08 + ChldBorn_avg_08
                     | 0 | 0 |lcode,
                     data = elections,
                     exactDOF = TRUE,
                     keepCX = TRUE)
summary(turn_demog_5)
turn_demog_6 <- felm(turnout ~ triangle * post  + cycle +
                      AcadmCert_pcnt_08 + Worked_08 +
                      HousingDens_avg_08 + Vehicle1up_pcnt_08 + ChldBorn_avg_08+
                      age0_19sef_pcnt_08 + age65sef_pcnt_08 + age85sef_pcnt_08
                     | 0 | 0 |lcode,
                     data = elections,
                     exactDOF = TRUE,
                     keepCX = TRUE)
summary(turn_demog_6)

# ## Save in table
# stargazer(turn_demog_1, turn_demog_2, turn_demog_3,
#           turn_demog_4, turn_demog_5, turn_demog_6,
#           out= "tables/table_A5.tex",  style="qje",
#           title="Deal of the Century Effect on Turnout (2008 Census Covariates)",
#           order = c(1,2,13,3:20),
#           omit = c("Constant", "cycleThird", "cycleSecond"),
#           dep.var.labels = c("Turnout"),
#           covariate.labels = c("Triangle", "Post", "Triangle * Post",
#                                "Perc. Academic", "Perc. Employed", "Housing Density",
#                                "HH with Vehicle", "Average Children per Women",
#                                "Perc. Age 0-19", "Perc. Age 65+",
#                                "Perc. Age 85+"),
#           omit.stat = c("rsq", "f", "ser", "adj.rsq"),
#           ci=FALSE,
#           omit.table.layout = "n",
#           column.sep.width = "-6pt",
#           star.cutoffs = NULL,
#           label = "tab:rob_cov",
#           star.char = c("", "", ""),
#           add.lines = list(
#            c("Cycle FE",  "Yes", "Yes", "Yes","Yes", "Yes", "Yes"),
#            c("Cluster",  "Locality", "Locality", "Locality","Locality", "Locality", "Locality"),
#            c("Sample",  "Non-Jewish", "Non-Jewish", "Non-Jewish", "All", "All", "All"),
#            c("Pre-Register",  "No", "No", "No",
#              "No", "No")))

# ----------------------------------------------------------------------
# Excluding Jerusalem
# ----------------------------------------------------------------------

## Removing Jerusalem
names(nj_elec)
`%nin%` = Negate(`%in%`)
nj_elec1 <- nj_elec %>%
    filter(.,
           locality_english %nin% c("JERUSALEM"))

## with population control
tur_nj <- felm(turnout ~ triangle * post + pop_2018| 0 | 0 |lcode,
            data = nj_elec1,
            exactDOF = TRUE,
            keepCX = TRUE)
summary(tur_nj)

## with pop control and cycle fe
tur_nj_cyc <- felm(turnout ~ triangle * post + pop_2018 +cycle | 0 | 0 |lcode,
               data = nj_elec1,
               exactDOF = TRUE,
               keepCX = TRUE)
summary(tur_nj_cyc)

## with pop control, cycle fe, and edu control
tur_edu_nj <- felm(turnout ~ triangle * post + pop_2018 + cycle +
                    AcadmCert_pcnt_08| 0 | 0 |lcode,
                data = nj_elec1,
                exactDOF = TRUE,
                keepCX = TRUE)

summary(tur_edu_nj)

## with locality and cycle clustered SEs
tur_pop_clus2_nj <- felm(turnout ~ triangle * post + pop_2018 +cycle | 0 | 0 |lcode + cycle,
                      data = nj_elec1,
                      exactDOF = TRUE,
                      keepCX = TRUE)

summary(tur_pop_clus2_nj)

## Output tables
stargazer(tur_nj, tur_nj_cyc, tur_edu_nj,
          tur_pop_clus2_nj,
          out= "tables/table_A6.tex",  style="qje",
          title="Deal of the Century Effect on Turnout (No Jerusalem)",
          omit = c("Constant", "cycleThird", "cycleSecond", "pop_2018","AcadmCert_pcnt_08"),
          dep.var.labels = c("Turnout"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci=FALSE,
          omit.table.layout = "n",
          column.sep.width = "-6pt",
          star.cutoffs = NULL,
          label = "tab:jlm_did",
          star.char = c("", "", ""),
          add.lines = list(
           c("Population Controls",  "Yes", "Yes", "Yes","Yes", "Yes"),
           c("Edu Control",  "No", "No", "Yes", "Yes", "No"),
           c("Cycle FE",  "No", "Yes", "Yes", "Yes", "Yes"),
           c("Cluster",  "Locality", "Locality", "Locality", "Locality + Cycle"),
           c("Sample",  "Non-Jewish", "Non-Jewish", "Non-Jewish", "Non-Jewish"),
           c("Pre-Register",  "No", "No", "No", "No")))

# ----------------------------------------------------------------------
# Booth data analysis
# ----------------------------------------------------------------------

## Produce data at the cycle + triangle/non-triangle level for the plot
k <- kalpi_sum %>%
 group_by(cycle, type) %>%
 summarise(.,
           n = n()) %>%
 filter(.,
        !is.na(type))

## Plot
ggplot(k, aes(x = cycle, y = n, color = type, shape = type)) +
 geom_line()+
 geom_point() +
 labs(x = "Election Cycle",
      y = "Number of Voting Stations per Locality",
      color = "", shape = "") +
 scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
 scale_x_continuous(breaks = 1:3,
                    limits = c(1,3),
                    labels = paste0(c("First", "Second",
                                      "Third"))) +
    guides(colour = guide_legend(reverse = T), shape = guide_legend(reverse = TRUE)) +
    theme(text = element_text(size = 10, family = "Times"),
       legend.key=element_blank(),
       panel.grid.major = element_blank(),
       axis.text.x = element_text(size = 10),
       plot.caption = element_text(size = 10, family = "Times",hjust = -.02),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(),
       axis.line = element_line(colour = "black"))
ggsave("plots/figure_A3.pdf", width = 6.5, height = 4)

# ----------------------------------------------------------------------
# Matching for the social media analysis
# ----------------------------------------------------------------------

## This is for the social media analysis, but uses the voting data so including
## it in this script

## Want to match on turnout in rounds 1 and 2 as well as population data, but
## only among Arab localities

## Which locality_relig indicator is for Arab communities, mixed communities,
## etc.?
nj_elec$locality_english[nj_elec$locality_relig == 2]
nj_elec$locality_english[nj_elec$locality_relig == 3]
nj_elec$locality_english[nj_elec$locality_relig == 4]

## 3 = Bedouin villages
## 4 = mixed localities

## So will want to restrict matching to locality_relig == 2

## Restricting to non-Bedouin Arab localities and getting data in wide format
matchdat <- nj_elec %>%
    filter(locality_relig == 2 & cycle %in% c("First", "Second")) %>%
    dplyr::select(lcode, pop_2018, cycle, triangle, turnout) %>%
    spread(cycle, turnout)

## Matching
mymatch1 <- matchit(formula = triangle ~ pop_2018 + First + Second,
                    data = matchdat,
                    method = "nearest",
                    distance = "mahalanobis")
mymatch2 <- matchit(formula = triangle ~ pop_2018 + First + Second,
                    data = matchdat,
                    method = "nearest",
                    distance = "logit")

## Matches (by row number in matchdat)
matchloc_tri <- matchdat$lcode[as.numeric(row.names(mymatch1$match.matrix))]
matchloc_nontri <- matchdat$lcode[as.numeric(mymatch1$match.matrix[, 1])]
match_english <- data.table(
    "Triangle" = plyr::mapvalues(matchloc_tri, yishuv_index$lcode, yishuv_index$locality_english),
    "Non-Triangle (Match)" = plyr::mapvalues(matchloc_nontri, yishuv_index$lcode, yishuv_index$locality_english)
)

## Table to show matching results
matchtab <- data.table(
    "Variable" = c("2018 Population", "Turnout, April 2019",
                   "Turnout, September 2019"),
    "Original" = summary(mymatch1)$sum.all[, 3],
    "Mahalanobis" = summary(mymatch1)$sum.matched[, 3],
    "PSM (Logit)" = summary(mymatch2)$sum.matched[2:4, 3]
)
t <- stargazer(matchtab,
               style = "qje",
               summary = FALSE,
               rownames = FALSE,
               title = "Matching Results, Standardized Mean Difference",
               label = "tab:matching")
t <- gsub("\\$\\$\\-\\$", "\\$\\-", t) # get rid of dollar signs around minus
write.table(t, file = "tables/table_A15.tex",
            row.names = FALSE, col.names = FALSE, quote = FALSE)


## Write matches to file
stargazer(match_english,
          style = "qje",
          summary = FALSE,
          rownames = TRUE,
          title = "Triangle and Matched Non-Triangle Localities",
          label = "tab:matched_local",
          out = "tables/table_A16.tex")










#################### ---------- BSS -----------------------###############################################################################
###
###
###

library(fixest)

####### Voting Mobilization
### Checking parallel trends for Model 3 in the original Table 1

## Function to calculate the mean and the range for each cycle
calc_range <- function(x) {
  n <- length(x)
  mean <- mean(x, na.rm = TRUE)
  high <- sd(x, na.rm = TRUE) * 1.96 / sqrt(n) + mean
  low <- mean - (sd(x, na.rm = TRUE) * 1.96 / sqrt(n))
  result <- data.frame(high, mean, low)
  names(result) <- c("high", "mean", "low")
  return(result)
}

zero_ext_tri_x1 <- nj_elec_all %>%
  filter(.,
         cycle == "Zero",
         ext_tri == 1)
tri_0_x1 <- calc_range(zero_ext_tri_x1$turnout)
tri_0_x1$Cycle <- 0
tri_0_x1$Condition <- "Extended Triangle"

zero_non_ext_tri_x1 <- nj_elec_all %>%
  filter(.,
         cycle == "Zero",
         ext_tri == 0)
non_tri_0_x1 <- calc_range(zero_non_ext_tri_x1$turnout)
non_tri_0_x1$Cycle <- 0
non_tri_0_x1$Condition <- "Non-Triangle"

### First cycle
first_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "First",
         ext_tri == 1)
tri_a_x1 <- calc_range(first_ext_tri_x1$turnout)
tri_a_x1$Cycle <- 1
tri_a_x1$Condition <- "Extended Triangle"

first_non_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "First",
         ext_tri == 0)
non_tri_a_x1 <- calc_range(first_non_ext_tri_x1$turnout)
non_tri_a_x1$Cycle <- 1
non_tri_a_x1$Condition <- "Non-Triangle"

### Second cycle
scnd_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "Second",
         ext_tri == 1)
tri_b_x1 <- calc_range(scnd_ext_tri_x1$turnout)
tri_b_x1$Cycle <- 2
tri_b_x1$Condition <- "Extended Triangle"

scnd_non_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "Second",
         ext_tri == 0)
non_tri_b_x1 <- calc_range(scnd_non_ext_tri_x1$turnout)
non_tri_b_x1$Cycle <- 2
non_tri_b_x1$Condition <- "Non-Triangle"

### Third cycle
thrd_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "Third",
         ext_tri == 1)
tri_c_x1 <- calc_range(thrd_ext_tri_x1$turnout)
tri_c_x1$Cycle <- 3
tri_c_x1$Condition <- "Extended Triangle"

thrd_non_ext_tri_x1 <- nj_elec %>%
  filter(.,
         cycle == "Third",
         ext_tri == 0)
non_tri_c_x1 <- calc_range(thrd_non_ext_tri_x1$turnout)
non_tri_c_x1$Cycle <- 3
non_tri_c_x1$Condition <- "Non-Triangle"

# combining
par_trend_x1 <- rbind(tri_a_x1, non_tri_a_x1, tri_b_x1, non_tri_b_x1, tri_c_x1, non_tri_c_x1)
par_trend_all_x1 <- rbind(tri_0_x1, non_tri_0_x1, tri_a_x1, non_tri_a_x1, tri_b_x1,
                          non_tri_b_x1, tri_c_x1, non_tri_c_x1)


par_trend_all_x1$Condition <- factor(par_trend_all_x1$Condition,
                                     levels = c("Extended Triangle", "Non-Triangle"))

# plotting
ggplot(par_trend_all_x1, aes(x = Cycle, y = mean, color = Condition, shape = Condition)) +
  geom_line(size = 1.2) +
  geom_point(size = 1.2) +
  geom_errorbar(aes(ymin = low, ymax = high), width = 0, size = 1) +
  scale_x_continuous(breaks = 0:3,
                     labels = c("2015", "2019a", "2019b", "2020")) +
  geom_vline(xintercept = 2.2, linetype = "dotted", color = "grey", size = 0.5) +
  annotate("text", x = 1.5, y = .42, label = "PRE", family = "Times") +
  annotate("text", x = 2.5, y = .42, label = "POST", family = "Times") +
  scale_color_manual(values = c("firebrick3", "dodgerblue3")) +
  labs(x = "Election Cycle", y = "Turnout", color = "", shape = "") +
  # guides(colour = guide_legend(reverse = TRUE), shape = guide_legend(reverse = TRUE)) +
  theme(text = element_text(size = 10, family = "Times"),
        legend.key = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.text.x = element_text(size = 10),
        plot.caption = element_text(size = 10, family = "Times", hjust = -0.02),
        axis.line = element_line(colour = "black"))
ggsave("plots/BSS_figure_1.png", width = 6.5, height = 4)





### Event study
nj_elec_x1 <- nj_elec_all %>%
  mutate(event_time = as.numeric(as.factor(cycle)) - 2)  

nj_elec_all$cycle
nj_elec_x1$cycle <- factor(nj_elec_x1$cycle, labels = c("2015", "2019a", "2019b", "2020"), 
                           levels = c("Zero", "First", "Second", "Third"))

event_model_x1 <- feols(
  turnout ~ i(cycle, ext_tri, ref = '2019b') | lcode + cycle, 
  data = nj_elec_x1,
  cluster = "lcode"
)
summary(event_model_x1)
png("plots/BSS_figure_2.png", width = 6.5, height = 4, units = "in", res = 300)
iplot(event_model_x1,
      xlab = "Election cycle",
      ylab = "Effect on Turnout",
      main = "Event Study: Parallel Trends Check for the Extended Triangle",
      col = "firebrick3", ref.line = 3.5)
dev.off()





### TABLE 4 --- Regressions Turnout

# Model 1
# Their model tur_pop: 10 treated localities with population controls instead of fixed effects

# Model 2
# Our model tur_pop_16: 16 treated localities with population controls instead of fixed effects
tur_pop_16 <- nj_elec %>%
  mutate(triangle = ext_tri) %>%
  felm(turnout ~ triangle * post + pop_2018 | 0 | 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(tur_pop_16)

# Model 3
# Their model tur_pop_lfe: 10 treated localities with fixed effects instead of population controls

# Model 4:
# Their model ext_tri: 16 treated localities with fixed effects instead of population controls

# Model 5
# Our model only_six_with_ten: 6 (not-mentioned) treated localities, 10 mentioned ones included as controls
only_six_with_ten <- nj_elec %>%
  mutate(triangle = ifelse(ext_tri == 1 & triangle == 0, 1, 0)) %>%
  felm(turnout ~ triangle * post | cycle + lcode | 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(only_six_with_ten)

# Model 6
# Our model only_six_without_ten: 6 (not-mentioned) treated localities, 10 mentioned ones excluded from controls
only_six_without_ten <- nj_elec %>%
  mutate(triangle = ifelse(ext_tri == 1 & triangle == 0, 1, 0)) %>%
  filter(triangle == 1 | ext_tri == 0) %>%
  felm(turnout ~ triangle * post | cycle + lcode | 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(only_six_without_ten)


## STANDARD ERROR HANDLING ## 

models <- list(tur_pop, tur_pop_16, tur_pop_lfe, ext_tri, only_six_with_ten, only_six_without_ten)
all_coefs <- c("triangle", "post", "triangle:post")  # only keep the ones we display

se_list <- lapply(models, function(m) {
  se <- coef(summary(m))[,"Cluster s.e."]
  out <- setNames(rep(NA, length(all_coefs)), all_coefs)
  matched <- intersect(names(se), all_coefs)
  out[matched] <- se[matched]
  return(out)
})

## TABLE OUTPUT ##
stargazer(models,
          out = "tables/table4_BSS.tex",
          style = "qje",
          title = "Deal of the Century Effect on Turnout",
          omit = c("Constant", "pop_2018"),
          dep.var.labels = c("Turnout"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci = FALSE,
          se = se_list,
          omit.table.layout = "n",
          column.sep.width = "4pt",
          label = "tab:main_did_all",
          add.lines = list(
            c("Pop Control",    "Yes", "Yes", "No", "No", "No", "No"),
            c("Cycle FE",       "No",  "No",  "Yes", "Yes", "Yes", "Yes"),
            c("Locality FE",    "No",  "No",  "Yes", "Yes", "Yes", "Yes"),
            c("Cluster",        rep("Locality", 6)),
            c("Sample",         "Non-Jew.", "Non-Jew.", "Non-Jew.", 
              "Non-Jew.", "Non-Jew.", "No 10"),
            c("Treatment",      "10 Loc.", "16 Loc.", "10 Loc.", 
              "16 Loc.", "6 Loc.", "6 Loc."),
            c("Pre-Register",   "Yes", "Yes", "No", "No", "No", "No")))









### Table 5 - Regressions Vote Shares

# Model 1
# Their model kl_vote_lfe: 10 treated Localities with fixed effects

# Model 2
# Our model kl_vote_lfe_16: 16 treated localities with fixed effects
kl_vote_lfe_16 <- nj_elec %>%
  mutate(triangle = ext_tri) %>%
  felm(kl_vs ~ triangle * post | lcode + cycle| 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(kl_vote_lfe_16)

# Model 3
# Our model kl_vote_lfe_only_six_with_ten: 6 (not-mentioned) treated localities, 10 mentioned ones included as controls
kl_vote_lfe_only_six_with_ten <- nj_elec %>%
  mutate(triangle = ifelse(ext_tri == 1 & triangle == 0, 1, 0)) %>%
  felm(kl_vs ~ triangle * post | cycle + lcode | 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(kl_vote_lfe_only_six_with_ten)

# Model 4
# Our model kl_vote_lfe_only_six_without_ten: 6 (not-mentioned) treated localities, 10 mentioned ones excluded from controls
kl_vote_lfe_only_six_without_ten <- nj_elec %>%
  mutate(triangle = ifelse(ext_tri == 1 & triangle == 0, 1, 0)) %>%
  filter(triangle == 1 | ext_tri == 0) %>%
  felm(kl_vs ~ triangle * post | cycle + lcode | 0 | lcode,
       data = ., exactDOF = TRUE, keepCX = TRUE)
summary(kl_vote_lfe_only_six_without_ten)


## TABLE OUTPUT ##
models_bluewhite <- list(kl_vote_lfe, kl_vote_lfe_16, kl_vote_lfe_only_six_with_ten, kl_vote_lfe_only_six_without_ten)

stargazer(models_bluewhite,
          out = "tables/table5_BSS.tex", 
          style="qje",
          title = "Deal of the Century Effect on Blue-White Vote Share",
          omit = c("Constant"),
          dep.var.labels = c("Blue-White", "Blue-White", "Blue-White", "Blue-White"),
          covariate.labels = c("Triangle", "Post", "Triangle * Post"),
          omit.stat = c("rsq", "f", "ser", "adj.rsq"),
          ci = FALSE,
          omit.table.layout = "n",
          column.sep.width = "25pt",
          label = "tab:party_did_all",
          add.lines = list(
            c("Cycle FE",  "Yes", "Yes", "Yes", "Yes"),
            c("Locality FE", "Yes", "Yes", "Yes", "Yes"),
            c("Cluster",  "Locality", "Locality", "Locality", "Locality"),
            c("Sample",  "Non-Jew.", "Non-Jew.", "Non-Jew.", "No 10"),
            c("Treatment", "10 Loc.", "16 Loc.", "6 Loc.", "6 Loc."),
            c("Pre-Register",  "No", "No", "No", "No")))








